{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c33e52ac-a587-41c3-b487-6a1533043a54",
   "metadata": {},
   "source": [
    "# **Code Used In NumPy Practical Examples: Useful Techniques**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29aff3d0-0427-4dd8-b14f-bc7ce9002a76",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !python -m pip install numpy\n",
    "# !python -m pip install matplotlib"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b740161-a80e-4d37-8c26-2dca52c68dce",
   "metadata": {},
   "source": [
    "**Example 1: Creating Multi-Dimensional Arrays From Files**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e64a11c1-a0b5-49f8-a0a6-dfe0adf674f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "array = np.zeros((3, 2, 3))\n",
    "print(id(array))\n",
    "\n",
    "for file_count, csv_file in enumerate(sorted(Path.cwd().glob(\"file?.csv\"))):\n",
    "    array[file_count] = np.loadtxt(csv_file.name, delimiter=\",\")\n",
    "\n",
    "print(id(array))\n",
    "print(array.shape)\n",
    "array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8de90c08-9bdc-42f3-ad4e-d7a34fca8215",
   "metadata": {},
   "outputs": [],
   "source": [
    "array = np.zeros((4, 2, 3))\n",
    "\n",
    "for file_count, csv_file in enumerate(sorted(Path.cwd().glob(\"file?.csv\"))):\n",
    "    array[file_count] = np.loadtxt(csv_file.name, delimiter=\",\")\n",
    "\n",
    "array[3, 0] = np.loadtxt(\"short_file.csv\", delimiter=\",\")\n",
    "\n",
    "array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63775265-deb1-4cca-bd7c-547b3d49f5ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "array = np.zeros((4, 2, 3))\n",
    "print(id(array))\n",
    "\n",
    "for file_count, csv_file in enumerate(sorted(Path.cwd().glob(\"file?.csv\"))):\n",
    "    array[file_count] = np.loadtxt(csv_file.name, delimiter=\",\")\n",
    "\n",
    "array = np.insert(arr=array, obj=2, values=0, axis=1)\n",
    "array[3] = np.loadtxt(\"long_file.csv\", delimiter=\",\")\n",
    "\n",
    "print(id(array))\n",
    "array"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fd4544ac-53bc-449c-88a2-f2496bba8f55",
   "metadata": {},
   "source": [
    "**Example 2: Reconciling Data Using Structured NumPy Arrays**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d760bcd-1be5-4396-8eb8-d729768909d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "race_results = np.array(\n",
    "    [\n",
    "        (\"At The Back\", 1.2, 3),\n",
    "        (\"Fast Eddie\", 1.3, 1),\n",
    "        (\"Almost There\", 1.1, 2),\n",
    "    ],\n",
    "    dtype=[\n",
    "        (\"horse_name\", \"U12\"),\n",
    "        (\"price\", \"f4\"),\n",
    "        (\"position\", \"i4\"),\n",
    "    ],\n",
    ")\n",
    "\n",
    "race_results[\"horse_name\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d3ae083-2f62-45a2-a761-c021484ab9d5",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.sort(race_results, order=\"position\")[[\"horse_name\", \"price\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b08dc1c-1797-4e54-a596-f5240150387c",
   "metadata": {},
   "outputs": [],
   "source": [
    "race_results[race_results[\"position\"] == 1][\"horse_name\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0df725d-10bb-47db-9213-5cdb20e3b2ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import numpy.lib.recfunctions as rfn\n",
    "\n",
    "issued_dtypes = [\n",
    "    (\"id\", \"i8\"),\n",
    "    (\"payee\", \"U10\"),\n",
    "    (\"amount\", \"f8\"),\n",
    "    (\"date_issued\", \"U10\"),\n",
    "]\n",
    "cashed_dtypes = [(\"id\", \"i8\"), (\"amount\", \"f8\"), (\"date_cashed\", \"U10\")]\n",
    "\n",
    "issued_checks = np.loadtxt(\n",
    "    Path(\"issued_checks.csv\"), delimiter=\",\", dtype=issued_dtypes, skiprows=1\n",
    ")\n",
    "cashed_checks = np.loadtxt(\n",
    "    Path(\"cashed_checks.csv\"), delimiter=\",\", dtype=cashed_dtypes, skiprows=1\n",
    ")\n",
    "\n",
    "cashed_check_details = rfn.rec_join(\n",
    "    \"id\", issued_checks, cashed_checks, jointype=\"inner\"\n",
    ")\n",
    "cashed_check_details[[\"payee\", \"date_issued\", \"date_cashed\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2faa8285-ae4f-4808-8ee1-aeb5bc21fbb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The following gives an error, the amount column has been renamed\n",
    "#\n",
    "# cashed_check_details[[\"payee\", \"date_issued\", \"date_cashed\", \"amount\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94cc2c27-9dec-4dbe-a23f-2a3dba403b70",
   "metadata": {},
   "outputs": [],
   "source": [
    "cashed_check_details[[\"payee\", \"date_issued\", \"date_cashed\", \"amount1\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34c8719b-1ee1-4635-8bdc-bf8fa624485e",
   "metadata": {},
   "outputs": [],
   "source": [
    "outstanding_checks = [\n",
    "    check_id\n",
    "    for check_id in issued_checks[\"id\"]\n",
    "    if check_id not in cashed_checks[\"id\"]\n",
    "]\n",
    "\n",
    "[int(i) for i in outstanding_checks]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ce0831b0-e8d0-45cf-be1f-8a71c5493f35",
   "metadata": {},
   "outputs": [],
   "source": [
    "[i for i in cashed_checks[\"id\"] if i not in issued_checks[\"id\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b4d62074-c38b-4bd6-b8b9-d569ceceeab6",
   "metadata": {},
   "source": [
    "**NumPy Example 3: Analyzing and Charting Hierarchical Data**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "861a2d6b-392d-4de8-a694-38d23ac21f24",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "days = [\"mon\", \"tue\", \"wed\", \"thu\", \"fri\"]\n",
    "days_dtype = [(day, \"f8\") for day in days]\n",
    "company_dtype = [(\"company\", \"U20\"), (\"sector\", \"U20\")]\n",
    "\n",
    "portfolio_dtype = np.dtype(company_dtype + days_dtype)\n",
    "portfolio = np.zeros((6,), dtype=portfolio_dtype)\n",
    "portfolio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52c7366d-5556-4680-93ee-c0a6467a4d08",
   "metadata": {},
   "outputs": [],
   "source": [
    "companies = np.loadtxt(\n",
    "    Path(\"portfolio.csv\"),\n",
    "    delimiter=\",\",\n",
    "    dtype=company_dtype,\n",
    "    skiprows=1,\n",
    ").reshape((6,))\n",
    "\n",
    "portfolio[[\"company\", \"sector\"]] = companies\n",
    "portfolio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd244b7a-c919-4f69-bad2-fd70dfe68195",
   "metadata": {},
   "outputs": [],
   "source": [
    "share_prices_dtype = [\n",
    "    (\"company\", \"U20\"),\n",
    "    (\"day\", \"f8\"),\n",
    "]\n",
    "\n",
    "for day, csv_file in zip(\n",
    "    days, sorted(Path.cwd().glob(\"share_prices-?.csv\")), strict=False\n",
    "):\n",
    "    portfolio[day] = np.loadtxt(\n",
    "        csv_file.name,\n",
    "        delimiter=\",\",\n",
    "        dtype=share_prices_dtype,\n",
    "        skiprows=1,\n",
    "    )[\"day\"]\n",
    "\n",
    "portfolio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6a50547-5527-49fd-9c14-9f862d0724d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_array = portfolio[portfolio[\"company\"] == \"Company_C\"]\n",
    "filtered_array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d1249605-ff28-4954-a4f4-f523e7a17388",
   "metadata": {},
   "outputs": [],
   "source": [
    "portfolio[portfolio[\"sector\"] == \"technology\"][\"fri\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "17c86bd6-5c68-4bd8-ac2c-148b0c383abf",
   "metadata": {},
   "outputs": [],
   "source": [
    "portfolio[portfolio[\"sector\"] == \"technology\"][\"fri\"] * 250 * 0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c58c7e9a-2317-4537-9c81-8f306d1adcae",
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(portfolio[portfolio[\"sector\"] == \"technology\"][\"fri\"] * 250 * 0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42fe3933-cc9d-4643-87f2-fda5736a3934",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "tech_mask = portfolio[\"sector\"] == \"technology\"\n",
    "tech_sector = portfolio[tech_mask][\"company\"]\n",
    "tech_valuation = portfolio[tech_mask][\"fri\"] * 250 * 0.01\n",
    "\n",
    "plt.bar(x=tech_sector, height=tech_valuation, data=tech_valuation)[\n",
    "    0\n",
    "].set_color(\"g\")\n",
    "\n",
    "plt.xlabel(\"Tech Companies\")\n",
    "plt.ylabel(\"Friday Price ($)\")\n",
    "plt.title(\"Tech Share Valuation ($)\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77f606fb-e0c5-4aa2-bbbe-894642997840",
   "metadata": {},
   "outputs": [],
   "source": [
    "cities = [\"london\", \"new_york\", \"rome\"]\n",
    "cities_dtype = [(city, \"i8\") for city in cities]\n",
    "city_files_dtype = [(\"month\", \"U20\"), (\"temp\", \"i8\")]\n",
    "weather_data_dtype = np.dtype([(\"month\", \"U20\")] + cities_dtype)\n",
    "weather_data = np.zeros((12,), dtype=weather_data_dtype)\n",
    "\n",
    "for city in cities:\n",
    "    temps = np.loadtxt(\n",
    "        Path(f\"{city}_temperatures.csv\"),\n",
    "        delimiter=\",\",\n",
    "        dtype=city_files_dtype,\n",
    "    )\n",
    "    weather_data[[\"month\", city]] = temps\n",
    "\n",
    "weather_data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6b6fb23-87ee-4cf0-a750-431f016c6fc0",
   "metadata": {},
   "source": [
    "**Example 4: Writing Your Own Vectorization Functions**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a804ca44-f259-4151-bc0f-a9298999a1e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "share_dtypes = [\n",
    "    (\"company\", \"U20\"),\n",
    "    (\"sector\", \"U20\"),\n",
    "    (\"mon\", \"f8\"),\n",
    "    (\"tue\", \"f8\"),\n",
    "    (\"wed\", \"f8\"),\n",
    "    (\"thu\", \"f8\"),\n",
    "    (\"fri\", \"f8\"),\n",
    "]\n",
    "\n",
    "portfolio = np.loadtxt(\n",
    "    Path(\"full_portfolio.csv\"),\n",
    "    delimiter=\",\",\n",
    "    dtype=share_dtypes,\n",
    "    skiprows=1,\n",
    ")\n",
    "\n",
    "portfolio[\"fri\"] - portfolio[\"mon\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3be836cd-5c7e-4218-95f3-fd5b94b22157",
   "metadata": {},
   "outputs": [],
   "source": [
    "def profit_with_bonus(first_day, last_day):\n",
    "    if last_day >= first_day * 1.01:\n",
    "        return (last_day - first_day) * 1.1\n",
    "    else:\n",
    "        return last_day - first_day\n",
    "\n",
    "\n",
    "# The following causes an error because in_profit() doesn't know\n",
    "# how to work with NumPy arrays:\n",
    "#\n",
    "# profit_with_bonus(portfolio[\"mon\"], portfolio[\"fri\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35dab65e-baf0-4a0a-b773-0ec7133b1b23",
   "metadata": {},
   "outputs": [],
   "source": [
    "vectorized_profit_with_bonus = np.vectorize(profit_with_bonus)\n",
    "vectorized_profit_with_bonus(portfolio[\"mon\"], portfolio[\"fri\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acf8cad6-e93a-4ee9-817c-ffffdc4b825e",
   "metadata": {},
   "outputs": [],
   "source": [
    "profit_with_bonus(3, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f789fa62-e8e0-45c8-973f-b33ee54e3262",
   "metadata": {},
   "outputs": [],
   "source": [
    "@np.vectorize\n",
    "def profit_with_bonus(first_day, last_day):\n",
    "    if last_day >= first_day * 1.01:\n",
    "        return (last_day - first_day) * 1.1\n",
    "    else:\n",
    "        return last_day - first_day\n",
    "\n",
    "\n",
    "profit_with_bonus(portfolio[\"mon\"], portfolio[\"fri\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "257d37f9-5b7f-4ad6-beef-1c8f300a8347",
   "metadata": {},
   "outputs": [],
   "source": [
    "profit_with_bonus(3, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfec4cdd-db86-4cfe-a325-0597ff2a4f94",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.where(\n",
    "    portfolio[\"fri\"] > portfolio[\"mon\"] * 1.01,\n",
    "    (portfolio[\"fri\"] - portfolio[\"mon\"]) * 1.1,\n",
    "    portfolio[\"fri\"] - portfolio[\"mon\"],\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
